# -*- coding: utf8 -*-
import re
import common
from BeautifulSoup import BeautifulSoup

site_index = 'anicashi'
site_keyword = 'anicashi'
site_url = 'http://www.anicashi.jp/'
test_url = 'http://www.anicashi.jp/songs/003/007003.html'

def get_lyric(url):
    prefix = 'http://www.anicashi.jp/make_song_flash.php?scd='
    pattern = '/([0-9]+)\.html'

    bool = re.compile(pattern).search(url)

    if bool:
        id = bool.group(1)
        full_url = prefix + id

        # get data
        data = common.get_url_content(full_url)

        # strip the useless part
        start_str = '\x14\0\0'
        end_str = '\0'
        pos = data.find(start_str)
        start = pos + len(start_str)
        end = data.find(end_str, start)
        lyric = data[start:end]

        lyric = unicode(lyric, 'utf8')
        lyric = lyric.strip()

        # test for half to full
        lyric = common.half2full(lyric)


        song_info = _get_song_info(url)
        lyric = song_info + lyric

        return lyric

    return None

def _get_song_info(url):
    data = common.get_url_content(url)

    encoding = 'utf8'
    data = data.decode(encoding, 'ignore')

    soup = BeautifulSoup(data)

    lines = []

    # title
    h1 = soup.find('h1')
    title = h1.string

    # artist, lyric, music
    h2s = soup.findAll('h2')
    infos = [x.string for x in h2s] 
    info = '\n'.join(infos)

    lines.append(u'%s\n' % (title))
    lines.append(info)

    lines.append('\n\n')

    string = '\n'.join(lines)

    return string
